use strict;
use warnings;

###############################################################################################
# TO DO:
###############################################################################################
# - Try to make XPath work
###############################################################################################

use WWW::Mechanize;
use WWW::Mechanize::FormFiller;
use URI::URL;
use HTML::TreeBuilder;
#use HTML::TreeBuilder::XPath;
use Data::Dumper;

my $dir = 'data';
mkpath($dir) if ! -d $dir;
my $file_base = "$dir/boots_";
my $file_ext = ".html";

	# PAGE 1: http://www.boots.com/webapp/wcs/stores/servlet/StoreLocator?storeId=10052&catalogId=11051&langId=-1&errorViewName=StoreLookupView&requiredAction=locateNearestStores&displayView=StoreLookupResultsView&startIndex=0&currentPage=0&postcode=&town=london&submitLookupStore.x=38&submitLookupStore.y=7
	# PAGE 2: http://www.boots.com/webapp/wcs/stores/servlet/StoreLocator?selectedServiceIdsString=&requiredAction=locateNearestStores&catalogId=11051&errorViewName=StoreLookupResultsView&displayView=StoreLookupResultsView&postcode=&currentPage=1&langId=-1&town=london&startIndex=10&storeId=10052&cgiRequestType=placesCGI

#my $form_url = 'http://www.boots.com/webapp/wcs/stores/servlet/StoreLocator?requiredAction=displayStoreLookupPage&displayView=StoreLookupView&langId=-1&storeId=10052&catalogId=11051';
my $url = 'http://www.boots.com/webapp/wcs/stores/servlet/StoreLocator?storeId=10052&catalogId=11051&langId=-1&errorViewName=StoreLookupView&requiredAction=locateNearestStores&displayView=StoreLookupResultsView&startIndex=0&currentPage=0&postcode=&town=london&submitLookupStore.x=38&submitLookupStore.y=7';

my $startIndex = 0;
my $currentPage = 0;

my @list = ();

my $results_file = "results.tsv.txt"; 
open(RESULTS,'>',$results_file) || die "cant open $results_file for writing";
select RESULTS; $| = 1;
select STDOUT; $| = 1;

###########################################
# start
###########################################

foreach my $c (1..100) {
	warn "getting page $c...\n";

	my $file = $file_base . $currentPage . $file_ext;

	# download page
	&getPage( file => $file, url => $url );
	sleep 1;

	# parse page
	my $tree = HTML::TreeBuilder->new;
	$tree->parse_file($file);	

	# <script type="text/javascript">
	my @scripts = $tree->look_down('_tag', 'script', sub { defined $_[0] && defined $_[0]->attr('type') && $_[0]->attr('type') eq "text/javascript" });

	my $storelist;	
	foreach my $script (@scripts) {
		my $html = $script->as_HTML;
		#warn "script: " . $html;
		if ($html =~ m{MMIsSupportedBrowser}) {
			$storelist = $html;
			last;
		}
	}
	my @stores = split(/createMarker/, $storelist);

	shift @stores; # header row
	foreach my $store (@stores) {

		next unless $store =~ m{<div class="adr">};

		#my ($postcode) = $address =~ m/\b([A-PR-UWYZ][A-HK-Y0-9][A-HJKSTUW0-9]?[ABEHMNPRVWXY0-9]? {1,2}[0-9][ABD-HJLN-UW-Z]{2})\b/g;

		my ($lat) = $store =~ m{var latitude = "([^"]+?)"};
		my ($long) = $store =~ m{var longitude = "([^"]+?)"};

		my ($name) = $store =~ m{infoHtml \+=\s+'<h2>([^>]+?)</h2>'};
		my ($address1) = $store =~ m{<span class="address-line1">([^>]+?)<};
		my ($address2) = $store =~ m{<span class="address-line2">([^>]+?)<};
		my ($address3) = $store =~ m{<span class="address-line3">([^>]+?)<};
		my ($postcode) = $store =~ m{<span class="postal-code">([^>]+?)<};

		my ($phone) = $store =~ m{<div class="tel">[^>]+?<span class="key">Phone:</span>[^>]+?<span class="value">([^>]+?)</span>};

	#		warn "Checking: $store\n";

		my $item = {
			name => ($name || "null"),
			address1 => ($address1 || "null"),
			address2 => ($address2 || "null"),
			address3 => ($address3 || "null"),
			postcode => ($postcode || "null"),
			phone => ($phone || "null"),
			latitude => ($lat || "null"),
			longitude => ($long || "null"),
		};

	#		warn "found item: " . Dumper($item);

		push @list, $item;

		print "writing item: " . Dumper($item);
		print RESULTS join( "\t",
			$item->{name}, $item->{latitude}, $item->{longitude},
			$item->{address1}, $item->{address2}, $item->{address3},
			$item->{postcode}, $item->{phone}
		) . "\n";

	}

	# increment page
	$currentPage += 1;
	$url =~ s{currentPage=\d+}{currentPage=$currentPage};
	$startIndex += 10;
	$url =~ s{startIndex=\d+}{startIndex=$startIndex};

}	

###########################################
# finish
###########################################

close(RESULTS);


exit;






###########################################
# subs
###########################################
	
sub clean_up {
	my $s = shift;
	$s =~ s/^\s+//g;
	$s =~ s/\s+$//g;
	return $s;
}
	
	
sub getPage {
	my %p = @_;
	my $file = $p{file};
	my $url = $p{url};

	if (! -f $file) {
		warn "getting url\n";
		my $agent = WWW::Mechanize->new( autocheck => 1 );
		$agent->get($url);

		if (! $agent->success() ) {
			die "failed to get $url, stopping.\n";
		}

		warn "saving page\n";
		my $first_page = $agent->content;
		open(PAGE,'>',$file) || die "cant write to $file: $!";
		print PAGE $first_page;
		close(PAGE);
	}
	else {
		warn "reading local file $file\n";
	}
}
